Data Descriptives: DV

lfp <- rbind(data.frame(lfp = df$lfp_female, gender ='F'), data.frame(lfp = df$lfp_male, gender ='M'))
avg_lfp <- mean(df$lfp_female)
avg_lfp_m <- mean(df$lfp_male)
ggplot(lfp, aes(x=lfp, fill=gender)) + geom_histogram(alpha=0.5, position="identity") + geom_vline(xintercept=avg_lfp_m) + geom_vline(xintercept=avg_lfp)+  scale_x_continuous(breaks=seq(0,100,10)) + labs(title='LFP by gender in Canada  by census tract', x='LFP (%)') 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

avg_gap <- mean(df$lfp_gap)
ggplot(df_no_geom, aes(lfp_gap)) + geom_histogram(color="black", fill="white", binwidth = 2) + geom_vline(xintercept = avg_gap, color='red') + labs(title='LFP Gap (M-F) in Canada by census tract', x='percentage points')

Data Descriptives: IV

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Study 1: Gender differences in spatial behaviour

Commuting Modes

\[H_0: \text{driver}_{F} \geq \text{driver}_M \\ H_1: \text{driver}_{F} \lt \text{driver}_M\]

t.test(commute_modes_female$driver, commute_modes_male$driver, alternative='less', var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_modes_female$driver and commute_modes_male$driver
## t = -81.425, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##      -Inf -150.698
## sample estimates:
## mean of the differences 
##               -153.8055

Reject null hypothesis; less women commute as drivers than men.

\[H_0: \text{transit}_{F} \leq \text{transit}_M \\ H_1: \text{transit}_{F} \gt \text{transit}_M\]

t.test(commute_modes_female$transit, commute_modes_male$transit, alternative='greater', var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_modes_female$transit and commute_modes_male$transit
## t = 48.281, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  49.17677      Inf
## sample estimates:
## mean of the differences 
##                50.91152

Reject null hypothesis; more women commute by public transit than men.

Commuting Durations

T-tests: all stat sig that male and female are diff

t.test(commute_time_female$t15, commute_time_male$t15, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t15 and commute_time_male$t15
## t = 40.692, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  37.10181 40.85763
## sample estimates:
## mean of the differences 
##                38.97972
t.test(commute_time_female$t15to29, commute_time_male$t15to29, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t15to29 and commute_time_male$t15to29
## t = -20.524, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -23.36934 -19.29425
## sample estimates:
## mean of the differences 
##                -21.3318
t.test(commute_time_female$t30to44, commute_time_male$t30to44, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t30to44 and commute_time_male$t30to44
## t = -56.133, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -51.19870 -47.74324
## sample estimates:
## mean of the differences 
##               -49.47097
t.test(commute_time_female$t45to59, commute_time_male$t45to59, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t45to59 and commute_time_male$t45to59
## t = -30.318, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -19.02849 -16.71713
## sample estimates:
## mean of the differences 
##               -17.87281
t.test(commute_time_female$t60, commute_time_male$t60, var.equal=TRUE, paired=TRUE)
## 
##  Paired t-test
## 
## data:  commute_time_female$t60 and commute_time_male$t60
## t = -34.61, df = 5424, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.42807 -25.38022
## sample estimates:
## mean of the differences 
##               -26.90415

Study 2: determinants of LFP

df_vars <- df_no_geom[iv_colnames]
df_vars$lfp_female <- df_no_geom$lfp_female
model_all <- lm(lfp_female ~ ., data=df_vars)
summary(model_all)
## 
## Call:
## lm(formula = lfp_female ~ ., data = df_vars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.204  -2.433   0.017   2.356  47.163 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -21.365109   0.816814 -26.157  < 2e-16 ***
## pca1_stock                    -0.316397   0.039223  -8.067 8.82e-16 ***
## med_hh_income_1000            -0.015140   0.003480  -4.351 1.38e-05 ***
## avg_rooms_per_dwelling         0.252348   0.089990   2.804  0.00506 ** 
## percent_hh_with_children      -0.023203   0.005731  -4.049 5.22e-05 ***
## lfp_male                       0.990068   0.008570 115.532  < 2e-16 ***
## percent_drivers_female         0.305795   0.012654  24.166  < 2e-16 ***
## percent_publictransit_female   0.026553   0.003933   6.751 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.948 on 5417 degrees of freedom
## Multiple R-squared:  0.7718, Adjusted R-squared:  0.7715 
## F-statistic:  2617 on 7 and 5417 DF,  p-value: < 2.2e-16
plot(residuals(model_all), ylab='residuals')

plot(residuals(model_all) ~ df_vars$lfp_female, ylab='residuals', xlab='fitted value')

qqnorm(residuals(model_all), main='Residual QQ Plot (normal dist)')
qqline(residuals(model_all))

bptest(model_all)  # heteroskedasticity
## 
##  studentized Breusch-Pagan test
## 
## data:  model_all
## BP = 181.45, df = 7, p-value < 2.2e-16
model_no_sndi <- lm(lfp_female ~ . -pca1_stock, data=df_vars)
anova(model_no_sndi, model_all)

Multilevel model

df_vars$cma_uid <- df$cma_uid
model_cma <- lmer(lfp_female ~ pca1_stock + med_hh_income_1000 + avg_rooms_per_dwelling + percent_hh_with_children + percent_drivers_female + percent_publictransit_female + (1 | cma_uid), data=df_vars)
summary(model_cma)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## lfp_female ~ pca1_stock + med_hh_income_1000 + avg_rooms_per_dwelling +  
##     percent_hh_with_children + percent_drivers_female + percent_publictransit_female +  
##     (1 | cma_uid)
##    Data: df_vars
## 
## REML criterion at convergence: 36773.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.5525 -0.5566  0.0669  0.6184  5.7482 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  cma_uid  (Intercept)  7.885   2.808   
##  Residual             50.156   7.082   
## Number of obs: 5425, groups:  cma_uid, 49
## 
## Fixed effects:
##                                Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)                   4.550e+01  1.220e+00  1.558e+03  37.284  < 2e-16
## pca1_stock                   -5.107e-01  7.517e-02  5.355e+03  -6.794 1.21e-11
## med_hh_income_1000            1.737e-01  6.473e-03  4.190e+03  26.832  < 2e-16
## avg_rooms_per_dwelling       -2.912e+00  1.736e-01  5.033e+03 -16.772  < 2e-16
## percent_hh_with_children      1.047e-01  1.155e-02  5.403e+03   9.060  < 2e-16
## percent_drivers_female        3.686e-01  2.380e-02  5.418e+03  15.489  < 2e-16
## percent_publictransit_female  2.136e-02  7.206e-03  5.418e+03   2.965  0.00304
##                                 
## (Intercept)                  ***
## pca1_stock                   ***
## med_hh_income_1000           ***
## avg_rooms_per_dwelling       ***
## percent_hh_with_children     ***
## percent_drivers_female       ***
## percent_publictransit_female ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) pc1_st m___10 avg___ prc___ prcnt_d_
## pca1_stock   0.167                                     
## md_hh__1000  0.190  0.091                              
## avg_rms_pr_ -0.222 -0.282 -0.677                       
## prcnt_hh_w_ -0.006 -0.154 -0.013 -0.463                
## prcnt_drvr_ -0.728 -0.077 -0.024 -0.224  0.147         
## prcnt_pblc_ -0.327  0.020  0.120 -0.014 -0.083 -0.020
plot(residuals(model_cma), ylab='residuals')

plot(residuals(model_cma) ~ df_vars$lfp_female, ylab='residuals', xlab='fitted value')

qqnorm(residuals(model_cma), main='Residual QQ Plot (normal dist)')
qqline(residuals(model_cma))

Gender Difference in LFP

df_vars$lfp_gap <- df_vars$lfp_male - df_vars$lfp_female
model_gap <- lm(lfp_gap ~ . -lfp_female -lfp_male -cma_uid, data=df_vars)
summary(model_gap)
## 
## Call:
## lm(formula = lfp_gap ~ . - lfp_female - lfp_male - cma_uid, data = df_vars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -47.333  -2.333  -0.028   2.428  17.017 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  22.042585   0.570525  38.636  < 2e-16 ***
## pca1_stock                    0.314628   0.039194   8.027 1.21e-15 ***
## med_hh_income_1000            0.017064   0.003059   5.579 2.54e-08 ***
## avg_rooms_per_dwelling       -0.282646   0.086112  -3.282  0.00104 ** 
## percent_hh_with_children      0.024156   0.005672   4.259 2.09e-05 ***
## percent_drivers_female       -0.305210   0.012644 -24.139  < 2e-16 ***
## percent_publictransit_female -0.026643   0.003933  -6.775 1.38e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.948 on 5418 degrees of freedom
## Multiple R-squared:  0.1459, Adjusted R-squared:  0.1449 
## F-statistic: 154.2 on 6 and 5418 DF,  p-value: < 2.2e-16
plot(residuals(model_gap), ylab='residuals')

plot(residuals(model_gap) ~ df_vars$lfp_female, ylab='residuals', xlab='fitted value')

qqnorm(residuals(model_gap), main='Residual QQ Plot (normal dist)')
qqline(residuals(model_gap))

Montreal

## 
## Call:
## lm(formula = lfp_gap ~ . - lfp_male, data = mtl_data_reg)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.478  -2.737   0.170   2.877  13.040 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  28.33066    2.25917  12.540  < 2e-16 ***
## pca1_stock                    0.96955    0.20921   4.634 4.67e-06 ***
## med_hh_income_1000            0.03517    0.01864   1.887 0.059811 .  
## avg_rooms_per_dwelling       -2.61547    0.54491  -4.800 2.15e-06 ***
## percent_hh_with_children      0.21974    0.02809   7.824 3.55e-14 ***
## percent_drivers_female       -0.13353    0.03726  -3.584 0.000374 ***
## percent_publictransit_female -0.22254    0.03488  -6.381 4.31e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.47 on 459 degrees of freedom
## Multiple R-squared:  0.2874, Adjusted R-squared:  0.2781 
## F-statistic: 30.86 on 6 and 459 DF,  p-value: < 2.2e-16

## 
##  studentized Breusch-Pagan test
## 
## data:  model_mtl
## BP = 14.015, df = 6, p-value = 0.02947

Toronto CSD

## 
## Call:
## lm(formula = lfp_gap ~ . - lfp_male, data = to_data_reg)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.3863  -2.3234   0.0213   2.2864  13.7066 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  26.190825   1.994000  13.135  < 2e-16 ***
## pca1_stock                    0.645885   0.145511   4.439 1.09e-05 ***
## med_hh_income_1000            0.040870   0.009039   4.522 7.50e-06 ***
## avg_rooms_per_dwelling       -1.266213   0.273926  -4.622 4.72e-06 ***
## percent_hh_with_children      0.134199   0.018958   7.079 4.39e-12 ***
## percent_drivers_female       -0.184561   0.033647  -5.485 6.27e-08 ***
## percent_publictransit_female -0.224325   0.029703  -7.552 1.76e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.861 on 558 degrees of freedom
## Multiple R-squared:  0.2836, Adjusted R-squared:  0.2759 
## F-statistic: 36.82 on 6 and 558 DF,  p-value: < 2.2e-16